*****************************************************************************************************************
*     UTILITY TO ANALYSE RPAQ DATA, ENTERED AND CLEANED ACCORDING TO MRC EPI UNIT GUIDELINES  		 			*
*	  NOTE: This script uses median values within dataset for missing imputation!								*
*																												*
*	Authors: Robert Scott, Marcel den Hoed, Kate Westgate, Soren Brage (MRC Epidemiology Unit, Cambridge, UK)	*
*	version 1.5																									*
*	Date 11/09/2013 																							*
*****************************************************************************************************************

/*
Version history:
v1.3 - Incorporated version and template differences for Fenland R6 - June 2013
v1.4 - Specifically for Fenland R6.1 (merge in R6 dataset to use for median imputation when missing values) - July 2013
v1.5 - Generic version for web (removed Fenland specific information) & added housekeeping stop if template# not recognised - Oct 2013
*/

*Ensure the data entered .csv file has been cleaned and all variables (other than StudyID & template) are postfixed *_CLEAN

clear
set more off
set mem 600m
capture log close

**************************************************************************
*Enter folder and input file names
local FOLDER = "V:\P5_PhysAct\PA_Questionnaires\FOR WEB\RPAQ"
local INPUT_FILE = "RPAQEntryTemplate" /*.dta format*/
local OUTPUT_FILE = "`INPUT_FILE'_PROCESSED"

**************************************************************************
cd "`FOLDER'"
use "`INPUT_FILE'.dta", clear

*Check for QVersion and template
capture confirm var QVersion_CLEAN template
display _rc
count if missing(template)
local missing_templ=r(N)
count if missing(QVersion_CLEAN)
local missing_QV=r(N)
su template
local templatevalue = r(max)

if _rc!=0 | `missing_templ'>0 | `missing_QV'>0 | `templatevalue'>2{
di in red "STOP"
di in yellow "THIS FILE DOES NOT HAVE FULLY COMPLETED QVersion_CLEAN & template VARIABLES - ADD TO INPUT FILE TO DEFINE VERSION OF QUESTIONNAIRE & TEMPLATE FOR ENTRY USED" 
di in yellow "Template information (relates to coding structure used during data entry for section C (Recreation)) If used coding 1-7 use Template =1, if coded 1-8 (omitting 2) use Template = 2)"
STOP
*br StudyID QVersion_CLEAN template
}

order StudyID *_CLEAN*

* * * * * * * * PROCESSING * * * * * * *

count
local N=r(N)
	if `N'<1000 {
		* This dataset includes less than 1000 individuals.
		* Please note that in many cases, missing values will be imputed using in-sample median values for those particular variables.
		* If you are happy with this approach, just comment out the stop line below (add "*" at beginning of line) and rerun the script.
		stop
			}
		

	
*replace data entered as invalid (contains bracket) with system missing value
foreach var in QVersion_CLEAN Gettingabout_CLEAN  ///
 Mediaweekdaypre6pm_CLEAN Mediaweekdaypost6pm_CLEAN Mediaweekendpre6pm_CLEAN Mediaweekendpost6pm_CLEAN ///
 Computerweekdaypre6pm_CLEAN Computerweekdaypost6pm_CLEAN Computerweekendpre6pm_CLEAN Computerweekendpost6pm_CLEAN ///
 Stairweekday_CLEAN Stairweekend_CLEAN Work4wkago_CLEAN Work3wkago_CLEAN Work2wkago_CLEAN Work1wkago_CLEAN Worktype_CLEAN ///
 Wrkmiles_CLEAN Wrkkms_CLEAN Wrktimesperweek_CLEAN Wrkbycar_CLEAN Wrkbypubtran_CLEAN Wrkbybicycle_CLEAN Wrkbyfoot_CLEAN ///
 swimComp_CLEAN swimCompHr_CLEAN swimCompMin_CLEAN swimLeis_CLEAN swimLeisHr_CLEAN swimLeisMin_CLEAN ///
 backPackMountainClimb_CLEAN backPackMountainClimbHr_CLEAN backPackMountainClimbMin_CLEAN walkPleasure_CLEAN walkPleasureHr_CLEAN walkPleasureMin_CLEAN ///
 cyclingRacingRough_CLEAN cyclingRacingRoughHr_CLEAN cyclingRacingRoughMin_CLEAN cyclePleasure_CLEAN cyclePleasureHr_CLEAN cyclePleasureMin_CLEAN ///
 mowing_CLEAN mowingHr_CLEAN mowingMin_CLEAN waterLawn_CLEAN waterLawnHr_CLEAN waterLawnMin_CLEAN ///
 heavyGardening_CLEAN heavyGardeningHr_CLEAN heavyGardeningMin_CLEAN weedPrune_CLEAN weedPruneHr_CLEAN weedPruneMin_CLEAN ///
 dIY_CLEAN dIYHr_CLEAN dIYMin_CLEAN aerobicsHigh_CLEAN aerobicsHighHr_CLEAN aerobicsHighMin_CLEAN ///
 aerobicsOther_CLEAN aerobicsOtherHr_CLEAN aerobicsOtherMin_CLEAN exerciseWeights_CLEAN exerciseWeightsHr_CLEAN exerciseWeightsMin_CLEAN ///
 conditionExercise_CLEAN conditionExerciseHr_CLEAN conditionExerciseMin_CLEAN floorExercise_CLEAN floorExerciseHr_CLEAN floorExerciseMin_CLEAN ///
 dancing_CLEAN dancingHr_CLEAN dancingMin_CLEAN compRun_CLEAN compRunHr_CLEAN compRunMin_CLEAN jog_CLEAN jogHr_CLEAN jogMin_CLEAN ///
 bowling_CLEAN bowlingHr_CLEAN bowlingMin_CLEAN tennisBadminton_CLEAN tennisBadmintonHr_CLEAN tennisBadmintonMin_CLEAN  ///
 squash_CLEAN squashHr_CLEAN squashMin_CLEAN tableTennis_CLEAN tableTennisHr_CLEAN tableTennisMin_CLEAN golf_CLEAN golfHr_CLEAN golfMin_CLEAN ///
 footballRugbyHockey_CLEAN footballRugbyHockeyHr_CLEAN footballRugbyHockeyMin_CLEAN cricket_CLEAN cricketHr_CLEAN cricketMin_CLEAN ///
 rowing_CLEAN rowingHr_CLEAN rowingMin_CLEAN netVolleyBasketBall_CLEAN netVolleyBasketBallHr_CLEAN netVolleyBasketBallMin_CLEAN ///
 huntingShootingFish_CLEAN huntingShootingFishHr_CLEAN huntingShootingFishMin_CLEAN horseBased_CLEAN horseBasedHr_CLEAN horseBasedMin_CLEAN ///
 snookerBillardsDarts_CLEAN snookerBillardsDartsHr_CLEAN snookerBillardsDartsMin_CLEAN ///
 musicalInstrumentSing_CLEAN musicalInstrumentSingHr_CLEAN musicalInstrumentSingMin_CLEAN iceSkating_CLEAN iceSkatingHr_CLEAN iceSkatingMin_CLEAN ///
 sailingWindsurfBoat_CLEAN sailingWindsurfBoatHr_CLEAN sailingWindsurfBoatMin_CLEAN combatsSports_CLEAN combatsSportsHr_CLEAN combatsSportsMin_CLEAN ///
 Paidemployment_CLEAN{
	
	capture replace `var' = "" if substr(`var',1,1) == "("
	capture replace `var' = . if substr(`var',1,1) == "("
	destring `var', replace

	}

*****************************
/* Derivation of variables */
*****************************
* MISSINGA, data on TV-viewing, computer use and stairclimbing
gen MISSINGA = 2
* Default is no missingness: MISSINGA=2
replace MISSINGA = 1 if Gettingabout_CLEAN < 1 & Mediaweekdaypre6pm_CLEAN < 1 & Mediaweekdaypost6pm_CLEAN < 1 & ///
Mediaweekendpre6pm_CLEAN < 1 & Mediaweekendpost6pm_CLEAN < 1 & Computerweekdaypre6pm_CLEAN < 1 & ///
Computerweekdaypost6pm_CLEAN < 1 & Computerweekendpre6pm_CLEAN < 1 & Computerweekendpost6pm_CLEAN < 1 & ///
Stairweekday_CLEAN < 1 & Stairweekend_CLEAN < 1
* If all data is missing, then MISSINGA=1


/* MISSINGJOB AND EMPLOYED */
gen MISSINGJOB = 2
* Default is no missingness: MISSINGJOB=2 */
*MISSINGJOB == 1 also includes individuals who report being (un)employed and who record no work activities. i.e. they are still seen as missing. 
replace MISSINGJOB = 1 if (Work4wkago_CLEAN < 0 & Work3wkago_CLEAN < 0 & /// 
Work2wkago_CLEAN < 0 & Work1wkago_CLEAN < 0 & (Worktype_CLEAN <1 | Worktype_CLEAN == .)) 
*rpaqversion 8 has no data on work hours or employment status, but does have worktype data
replace MISSINGJOB = 1 if QVersion_CLEAN == 8 & (Worktype_CLEAN < 1 | Worktype_CLEAN == .)
gen EMPLOYED = .
replace EMPLOYED = 2 if Paidemployment_CLEAN == 1 
replace EMPLOYED = 1 if Paidemployment_CLEAN == 2 | MISSINGJOB == 2
* EMPLOYED = 1 if they are in employment


/* MISSINGCOMMUT */
gen MISSINGCOMMUT = 2
replace MISSINGCOMMUT = 1 if Wrktimesperweek_CLEAN < 0 & Wrkbycar_CLEAN < 1 & Wrkbypubtran_CLEAN < 1 & ///
Wrkbybicycle_CLEAN < 1 & Wrkbyfoot_CLEAN < 1

/* MISSINGC */
gen MISSINGC = 2
replace MISSINGC = 1 if ///
swimComp_CLEAN < 1 & swimCompHr_CLEAN < 0 & swimCompMin_CLEAN < 0 & ///
swimLeis_CLEAN < 1 & swimLeisHr_CLEAN < 0 & swimLeisMin_CLEAN < 0 & ///
backPackMountainClimb_CLEAN < 1 & backPackMountainClimbHr_CLEAN < 0 & backPackMountainClimbMin_CLEAN < 0 & ///
walkPleasure_CLEAN < 1 & walkPleasureHr_CLEAN < 0 & walkPleasureMin_CLEAN < 0 & ///
cyclingRacingRough_CLEAN < 1 & cyclingRacingRoughHr_CLEAN < 0 & cyclingRacingRoughMin_CLEAN < 0 & ///
cyclePleasure_CLEAN < 1 & cyclePleasureHr_CLEAN < 0 & cyclePleasureMin_CLEAN < 0 & ///
mowing_CLEAN < 1 & mowingHr_CLEAN < 0 & mowingMin_CLEAN < 0 & ///
waterLawn_CLEAN < 1 & waterLawnHr_CLEAN < 0 & waterLawnMin_CLEAN < 0 & ///
heavyGardening_CLEAN < 1 & heavyGardeningHr_CLEAN < 0 & heavyGardeningMin_CLEAN < 0 & ///
weedPrune_CLEAN < 1 & weedPruneHr_CLEAN < 0 & weedPruneMin_CLEAN < 0 & ///
dIY_CLEAN < 1 & dIYHr_CLEAN < 0 & dIYMin_CLEAN < 0 & ///
aerobicsHigh_CLEAN < 1 & aerobicsHighHr_CLEAN < 0 & aerobicsHighMin_CLEAN < 0 & ///
aerobicsOther_CLEAN < 1 & aerobicsOtherHr_CLEAN < 0 & aerobicsOtherMin_CLEAN < 0 & ///
exerciseWeights_CLEAN < 1 & exerciseWeightsHr_CLEAN < 0 & exerciseWeightsMin_CLEAN < 0 & ///
conditionExercise_CLEAN < 1 & conditionExerciseHr_CLEAN < 0 & conditionExerciseMin_CLEAN < 0 & ///
floorExercise_CLEAN < 1 & floorExerciseHr_CLEAN < 0 & floorExerciseMin_CLEAN < 0 & ///
dancing_CLEAN < 1 & dancingHr_CLEAN < 0 & dancingMin_CLEAN < 0 & ///
compRun_CLEAN < 1 & compRunHr_CLEAN < 0 & compRunMin_CLEAN < 0 & ///
jog_CLEAN < 1 & jogHr_CLEAN < 0 & jogMin_CLEAN < 0 & ///
bowling_CLEAN < 1 & bowlingHr_CLEAN < 0 & bowlingMin_CLEAN < 0 & ///
tennisBadminton_CLEAN < 1 &  tennisBadmintonHr_CLEAN < 0 & tennisBadmintonMin_CLEAN < 0 & ///
squash_CLEAN < 1 & squashHr_CLEAN < 0 & squashMin_CLEAN < 0 & ///
tableTennis_CLEAN < 1 & tableTennisHr_CLEAN < 0 & tableTennisMin_CLEAN < 0 & ///
golf_CLEAN < 1 & golfHr_CLEAN < 0 & golfMin_CLEAN < 0 & ///
footballRugbyHockey_CLEAN < 1 & footballRugbyHockeyHr_CLEAN < 0 & footballRugbyHockeyMin_CLEAN < 0 & ///
cricket_CLEAN < 1 & cricketHr_CLEAN < 0 & cricketMin_CLEAN < 0 & ///
rowing_CLEAN < 1 & rowingHr_CLEAN < 0 & rowingMin_CLEAN < 0 & ///
netVolleyBasketBall_CLEAN < 1 & netVolleyBasketBallHr_CLEAN < 0 & netVolleyBasketBallMin_CLEAN < 0 & ///
huntingShootingFish_CLEAN < 1 & huntingShootingFishHr_CLEAN < 0 & huntingShootingFishMin_CLEAN < 0 & ///
horseBased_CLEAN < 1 & horseBasedHr_CLEAN < 0 & horseBasedMin_CLEAN < 0 & ///
snookerBillardsDarts_CLEAN < 1 & snookerBillardsDartsHr_CLEAN < 0 & snookerBillardsDartsMin_CLEAN < 0 & ///
musicalInstrumentSing_CLEAN < 1 & musicalInstrumentSingHr_CLEAN< 0 & musicalInstrumentSingMin_CLEAN < 0 & ///
iceSkating_CLEAN < 1 & iceSkatingHr_CLEAN < 0 & iceSkatingMin_CLEAN < 0 & ///
sailingWindsurfBoat_CLEAN < 1 & sailingWindsurfBoatHr_CLEAN< 0 & sailingWindsurfBoatMin_CLEAN < 0 & ///
combatsSports_CLEAN < 1 & combatsSportsHr_CLEAN < 0 & combatsSportsMin_CLEAN < 0

/* MISSING */
gen MISSING = 2
replace MISSING = 1 if MISSINGJOB == 1 & MISSINGCOMMUT == 1 & MISSINGC == 1

/* CALCULATION OF DAILY DURATIONS */
gen GETABOUT = Gettingabout_CLEAN
replace GETABOUT = 0 if Gettingabout_CLEAN < 1 & MISSINGA == 2
replace GETABOUT = . if Gettingabout_CLEAN < 1 & MISSINGA == 1

/* TO ASSIGN THE MEDIAN FOR TV WHEN MISSING */
gen TVDUR1 = . if Mediaweekdaypre6pm_CLEAN < 1 & MISSINGA == 1 /* 'Not completed''*/
replace TVDUR1 = 0 if Mediaweekdaypre6pm_CLEAN < 1 & MISSINGA == 2 /* 'Not completed''*/
replace TVDUR1 = 0 if Mediaweekdaypre6pm_CLEAN == 1
replace TVDUR1 = 2.5 if Mediaweekdaypre6pm_CLEAN == 2
replace TVDUR1 = 7.5 if Mediaweekdaypre6pm_CLEAN == 3
replace TVDUR1 = 12.5 if Mediaweekdaypre6pm_CLEAN == 4
replace TVDUR1 = 17.5 if Mediaweekdaypre6pm_CLEAN == 5
replace TVDUR1 = 22.5 if Mediaweekdaypre6pm_CLEAN == 6
gen TVDUR2 = . if Mediaweekdaypost6pm_CLEAN < 1 & MISSINGA == 1 /* 'Not completed'*/
replace TVDUR2 = 7.5 if Mediaweekdaypost6pm_CLEAN < 1 & MISSINGA == 2 /* 'Not completed'*/
replace TVDUR2 = 0 if Mediaweekdaypost6pm_CLEAN == 1
replace TVDUR2 = 2.5 if Mediaweekdaypost6pm_CLEAN == 2
replace TVDUR2 = 7.5 if Mediaweekdaypost6pm_CLEAN == 3
replace TVDUR2 = 12.5 if Mediaweekdaypost6pm_CLEAN == 4
replace TVDUR2 = 17.5 if Mediaweekdaypost6pm_CLEAN == 5
replace TVDUR2 = 22.5 if Mediaweekdaypost6pm_CLEAN == 6
gen TVDUR3 = . if Mediaweekendpre6pm_CLEAN < 1 & MISSINGA == 1 /* 'Not completed'*/
replace TVDUR3 = 1 if Mediaweekendpre6pm_CLEAN < 1  & MISSINGA == 2 /* 'Not completed'*/
replace TVDUR3 = 0 if Mediaweekendpre6pm_CLEAN == 1
replace TVDUR3 = 1 if Mediaweekendpre6pm_CLEAN == 2
replace TVDUR3 = 3 if Mediaweekendpre6pm_CLEAN == 3
replace TVDUR3 = 5 if Mediaweekendpre6pm_CLEAN == 4
replace TVDUR3 = 7 if Mediaweekendpre6pm_CLEAN == 5
replace TVDUR3 = 9 if Mediaweekendpre6pm_CLEAN == 6
gen TVDUR4 = . if Mediaweekendpost6pm_CLEAN < 1 & MISSINGA == 1 /* 'Not completed'*/
replace TVDUR4 = 5 if Mediaweekendpost6pm_CLEAN < 1 & MISSINGA == 2 /* 'Not completed'*/
replace TVDUR4 = 0 if Mediaweekendpost6pm_CLEAN == 1
replace TVDUR4 = 1 if Mediaweekendpost6pm_CLEAN == 2
replace TVDUR4 = 3 if Mediaweekendpost6pm_CLEAN == 3
replace TVDUR4 = 5 if Mediaweekendpost6pm_CLEAN == 4
replace TVDUR4 = 7 if Mediaweekendpost6pm_CLEAN == 5
replace TVDUR4 = 9 if Mediaweekendpost6pm_CLEAN == 6
gen DURTV = (TVDUR1+TVDUR2+TVDUR3+TVDUR4)/7

/* TO ASSIGN THE MEDIAN FOR COMPUTER WHEN MISSING */
gen COMPDUR1 = . if Computerweekdaypre6pm_CLEAN < 1 & MISSINGA == 1 /* 'Not completed'*/
replace COMPDUR1 = 0 if Computerweekdaypre6pm_CLEAN < 1 & MISSINGA == 2 /* 'Not completed'*/
replace COMPDUR1 = 0 if Computerweekdaypre6pm_CLEAN == 1
replace COMPDUR1 = 2.5 if Computerweekdaypre6pm_CLEAN == 2
replace COMPDUR1 = 7.5 if Computerweekdaypre6pm_CLEAN == 3
replace COMPDUR1 = 12.5 if Computerweekdaypre6pm_CLEAN == 4
replace COMPDUR1 = 17.5 if Computerweekdaypre6pm_CLEAN == 5
replace COMPDUR1 = 22.5 if Computerweekdaypre6pm_CLEAN == 6
gen COMPDUR2 = . if Computerweekdaypost6pm_CLEAN < 1 & MISSINGA == 1 /* 'Not completed'*/
replace COMPDUR2 = 2.5 if Computerweekdaypost6pm_CLEAN < 1 & MISSINGA == 2 /* 'Not completed'*/
replace COMPDUR2 = 0 if Computerweekdaypost6pm_CLEAN == 1
replace COMPDUR2 = 2.5 if Computerweekdaypost6pm_CLEAN == 2
replace COMPDUR2 = 7.5 if Computerweekdaypost6pm_CLEAN == 3
replace COMPDUR2 = 12.5 if Computerweekdaypost6pm_CLEAN == 4
replace COMPDUR2 = 17.5 if Computerweekdaypost6pm_CLEAN == 5
replace COMPDUR2 = 22.5 if Computerweekdaypost6pm_CLEAN == 6
gen COMPDUR3 = . if Computerweekendpre6pm_CLEAN < 1 & MISSINGA == 1 /* 'Not completed'*/
replace COMPDUR3 = 1 if Computerweekendpre6pm_CLEAN < 1 & MISSINGA == 2 /* 'Not completed'*/
replace COMPDUR3 = 0 if Computerweekendpre6pm_CLEAN == 1
replace COMPDUR3 = 1 if Computerweekendpre6pm_CLEAN == 2
replace COMPDUR3 = 3 if Computerweekendpre6pm_CLEAN == 3
replace COMPDUR3 = 5 if Computerweekendpre6pm_CLEAN == 4
replace COMPDUR3 = 7 if Computerweekendpre6pm_CLEAN == 5
replace COMPDUR3 = 9 if Computerweekendpre6pm_CLEAN == 6
gen COMPDUR4 = . if Computerweekendpost6pm_CLEAN < 1 & MISSINGA == 1 /* 'Not completed'*/
replace COMPDUR4 = 1 if Computerweekendpost6pm_CLEAN < 1 & MISSINGA == 2 /* 'Not completed'*/
replace COMPDUR4 = 0 if Computerweekendpost6pm_CLEAN == 1
replace COMPDUR4 = 1 if Computerweekendpost6pm_CLEAN == 2
replace COMPDUR4 = 3 if Computerweekendpost6pm_CLEAN == 3
replace COMPDUR4 = 5 if Computerweekendpost6pm_CLEAN == 4
replace COMPDUR4 = 7 if Computerweekendpost6pm_CLEAN == 5
replace COMPDUR4 = 9 if Computerweekendpost6pm_CLEAN == 6
gen DURCOMP = (COMPDUR1+COMPDUR2+COMPDUR3+COMPDUR4)/7

/* FLIGHTS OF STAIRS */
gen STAIRFLIGHT1 = . if Stairweekday_CLEAN < 1 & MISSINGA == 1 /* 'Not completed'*/
replace STAIRFLIGHT1 = 40 if Stairweekday_CLEAN < 1 & MISSINGA == 2 /* 'Not completed'*/
replace STAIRFLIGHT1 = 0 if Stairweekday_CLEAN == 1
replace STAIRFLIGHT1 = 15 if Stairweekday_CLEAN == 2
replace STAIRFLIGHT1 = 40 if Stairweekday_CLEAN == 3
replace STAIRFLIGHT1 = 65 if Stairweekday_CLEAN == 4
replace STAIRFLIGHT1 = 90 if Stairweekday_CLEAN == 5
replace STAIRFLIGHT1 = 115 if Stairweekday_CLEAN == 6
gen STAIRFLIGHT2 = . if Stairweekend_CLEAN < 1 & MISSINGA == 1 /* 'Not completed'*/
replace STAIRFLIGHT2 = 16 if Stairweekend_CLEAN == . & MISSINGA == 2 /* 'Not completed'*/
replace STAIRFLIGHT2 = 0 if Stairweekend_CLEAN == 1
replace STAIRFLIGHT2 = 6 if Stairweekend_CLEAN == 2
replace STAIRFLIGHT2 = 16 if Stairweekend_CLEAN == 3
replace STAIRFLIGHT2 = 26 if Stairweekend_CLEAN == 4
replace STAIRFLIGHT2 = 36 if Stairweekend_CLEAN == 5
replace STAIRFLIGHT2 = 46 if Stairweekend_CLEAN == 6
*assign 10 seconds per flights of stairs
gen DURSTAIRFLIGHT = ((STAIRFLIGHT1+STAIRFLIGHT2)/7)/360


/* JOB ACTIVITY */
foreach var in Work4wkago_CLEAN Work3wkago_CLEAN ///
Work2wkago_CLEAN Work1wkago_CLEAN {
	replace `var' = . if `var' < 0 
}

forvalues i = 1/4 {
	gen DURATIONJOB`i' = Work`i'wkago_CLEAN 
	replace DURATIONJOB`i' = . if Work`i'wkago_CLEAN < 0
}


egen DURATIONJOB = rowtotal(DURATIONJOB1 DURATIONJOB2 DURATIONJOB3 DURATIONJOB4)
replace DURATIONJOB = . if DURATIONJOB1 ==. &  DURATIONJOB2 == . & DURATIONJOB3 == . & DURATIONJOB4 == .
su DURATIONJOB if MISSINGJOB == 2, detail
*version 8 had no work duration question. assigning median to those who answered the worktype question. 
replace DURATIONJOB = r(p50) if QVersion_CLEAN == 8 & MISSINGJOB == 2
*if median not computable, assign 37hrs/wk where worktype is reported but no duration
replace DURATIONJOB = 4*37 if Worktype_CLEAN > 0 & Worktype_CLEAN < 5 & (DURATIONJOB <0 | DURATIONJOB ==.)
*also assign median where worktype is reported but no duration
replace DURATIONJOB = r(p50) if Worktype_CLEAN > 0 & Worktype_CLEAN < 5 & (DURATIONJOB <0 | DURATIONJOB ==.)

*generate average weekly hours over last 4 weeks
replace DURATIONJOB = DURATIONJOB/4
*generate average daily hours of work
gen DURJOB = DURATIONJOB/7 if DURATIONJOB > 0 & DURATIONJOB <= 84
replace DURJOB = 0 if DURATIONJOB == 0
*truncate to max 12hrs per day
replace DURJOB = 12 if DURATIONJOB > 84 & DURATIONJOB !=.



/* TRANSPORTATION ACTIVITY */
gen FREQCARINI = 0 if (Wrkbycar_CLEAN < 1 | Wrkbycar_CLEAN > 4) & MISSINGCOMM == 2 /* 'NOT COMPLETED'*/
replace FREQCARINI = 0 if Wrkbycar_CLEAN == 4 /* 'Never or rarely'*/
replace FREQCARINI = .25 if Wrkbycar_CLEAN == 3 /* 'Occasionally'*/
replace FREQCARINI = .75 if Wrkbycar_CLEAN == 2 /* 'Usually'*/
replace FREQCARINI = 1 if Wrkbycar_CLEAN == 1 /* 'Always'*/
gen FREQPUBLICINI = 0 if (Wrkbypubtran_CLEAN < 1 | Wrkbypubtran_CLEAN > 4) & MISSINGCOMM == 2 /* 'NOT COMPLETED'*/
replace FREQPUBLICINI = 0 if Wrkbypubtran_CLEAN == 4 /* 'Never or rarely'*/
replace FREQPUBLICINI = .25 if Wrkbypubtran_CLEAN == 3 /* 'Occasionally'*/
replace FREQPUBLICINI = .75 if Wrkbypubtran_CLEAN == 2 /* 'Usually'*/
replace FREQPUBLICINI = 1 if Wrkbypubtran_CLEAN == 1 /* 'Always'*/
gen FREQCYCLEINI = 0 if (Wrkbybicycle_CLEAN < 1 | Wrkbybicycle_CLEAN > 4) & MISSINGCOMM == 2 /* 'NOT COMPLETED'*/
replace FREQCYCLEINI = 0 if Wrkbybicycle_CLEAN == 4 /* 'Never or rarely'*/
replace FREQCYCLEINI = .25 if Wrkbybicycle_CLEAN == 3 /* 'Occasionally'*/
replace FREQCYCLEINI = .75 if Wrkbybicycle_CLEAN == 2 /* 'Usually'*/
replace FREQCYCLEINI = 1 if Wrkbybicycle_CLEAN == 1 /* 'Always'*/
gen FREQWALKINI = 0 if (Wrkbyfoot_CLEAN < 1 | Wrkbyfoot_CLEAN > 4) & MISSINGCOMM == 2 /* 'NOT COMPLETED'*/
replace FREQWALKINI = 0 if Wrkbyfoot_CLEAN == 4 /* 'Never or rarely'*/
replace FREQWALKINI = .25 if Wrkbyfoot_CLEAN == 3 /* 'Occasionally'*/
replace FREQWALKINI = .75 if Wrkbyfoot_CLEAN == 2 /* 'Usually'*/
replace FREQWALKINI = 1 if Wrkbyfoot_CLEAN == 1 /* 'Always'*/
gen FREQTOTAL = FREQCARINI + FREQPUBLICINI + FREQCYCLEINI + FREQWALKINI


gen DISTWORKMILES = .
replace DISTWORKMILES = 0 if  Wrkmiles_CLEAN <=0 & Wrkkms_CLEAN <=0
replace DISTWORKMILES = (Wrkkms_CLEAN*0.62) if (Wrkkms_CLEAN >0 & Wrkmiles_CLEAN <=0)
replace DISTWORKMILES = (Wrkmiles_CLEAN) if (Wrkmiles_CLEAN >0 & Wrkkms_CLEAN <=0)
replace DISTWORKMILES = ((Wrkmiles_CLEAN +0.62*Wrkkms_CLEAN)/2) if (Wrkkms_CLEAN > 0 & Wrkmiles_CLEAN > 0)

*Truncation of total distance ****set maximum miles to 100miles for anyone claiming more than that as a distance
replace DISTWORKMILES = 100 if (DISTWORKMILES > 100 & DISTWORKMILES  != .)

gen CARMILES = 0
replace CARMILES = DISTWORKMILES * FREQCARINI if FREQCARINI >= 0.25
gen PUBLICMILES = 0
replace PUBLICMILES = DISTWORKMILES * FREQPUBLICINI if FREQPUBLICINI  >= 0.25
gen CYCLEMILES = 0
replace CYCLEMILES = DISTWORKMILES * FREQCYCLEINI if FREQCYCLEINI >= 0.25
gen WALKMILES = 0 
replace WALKMILES = DISTWORKMILES * FREQWALKINI if FREQWALKINI  >= 0.25

gen TOTALTRAVMILES_DER = CARMILES + PUBLICMILES + CYCLEMILES + WALKMILES 

gen DISTFACTOR = DISTWORKMILES/TOTALTRAVMILES_DER



*Assigning distance to multi-mode commuters. 
* We have made many assumptions here. We have only made amendments when the total frequency of travel is > 1.25. Eg when someone reports always travelling by car 
* and walking, we make the assumption that they drive most (90%) of the way. This may get further truncated in the next section.

		replace CARMILES = (0.5*DISTWORKMILES) if FREQCARINI >= 0.25 & FREQPUBLICINI >= 0.25 & FREQCYCLEINI <0.25 & FREQWALKINI < 0.25 & FREQTOTAL >= 1.25
		replace PUBLICMILES = (0.5*DISTWORKMILES) if FREQCARINI >= 0.25 & FREQPUBLICINI >= 0.25 & FREQCYCLEINI <0.25 & FREQWALKINI < 0.25& FREQTOTAL >= 1.25

		replace CARMILES = (0.95*DISTWORKMILES) if FREQCARINI >= 0.25 & FREQCYCLEINI >= 0.25 & FREQPUBLICINI <0.25 & FREQWALKINI < 0.25 & FREQTOTAL >= 1.25
		replace CYCLEMILES = (0.05*DISTWORKMILES) if FREQCARINI >= 0.25 & FREQCYCLEINI >= 0.25 & FREQPUBLICINI <0.25 & FREQWALKINI < 0.25 & FREQTOTAL >= 1.25

		replace CARMILES = (0.99*DISTWORKMILES) if FREQCARINI >= 0.25 & FREQWALKINI >= 0.25 & FREQCYCLEINI <0.25 & FREQPUBLICINI < 0.25 & FREQTOTAL >= 1.25
		replace WALKMILES = (0.01*DISTWORKMILES) if FREQCARINI >= 0.25 & FREQWALKINI >= 0.25  & FREQCYCLEINI <0.25 & FREQPUBLICINI < 0.25 & FREQTOTAL >= 1.25

		replace PUBLICMILES = (0.9*DISTWORKMILES) if FREQPUBLICINI >= 0.25 & FREQCYCLEINI >= 0.25 & FREQCARINI <0.25 & FREQWALKINI < 0.25 & FREQTOTAL >= 1.25
		replace CYCLEMILES = (0.1*DISTWORKMILES) if FREQPUBLICINI >= 0.25 & FREQCYCLEINI >= 0.25 & FREQCARINI <0.25 & FREQWALKINI < 0.25 & FREQTOTAL >= 1.25

		replace PUBLICMILES = (0.99*DISTWORKMILES) if FREQPUBLICINI >= 0.25 & FREQWALKINI >= 0.25 & FREQCYCLEINI <0.25 & FREQCARINI < 0.25 & FREQTOTAL >= 1.25
		replace WALKMILES = (0.01*DISTWORKMILES) if FREQPUBLICINI >= 0.25 & FREQWALKINI >= 0.25 & FREQCYCLEINI <0.25 & FREQCARINI < 0.25 &  FREQTOTAL >= 1.25

 		replace CYCLEMILES = (0.95*DISTWORKMILES) if FREQCYCLEINI >= 0.25 & FREQWALKINI >= 0.25 & FREQCARINI <0.25 & FREQPUBLICINI < 0.25 & FREQTOTAL >= 1.25
		replace WALKMILES = (0.05*DISTWORKMILES) if FREQCYCLEINI >= 0.25 & FREQWALKINI >= 0.25 & FREQCARINI <0.25 & FREQPUBLICINI < 0.25 &  FREQTOTAL >= 1.25


		replace CARMILES = (0.475*DISTWORKMILES) if FREQCARINI >= 0.25 & FREQPUBLICINI >= 0.25 & FREQCYCLEINI >= 0.25 & FREQWALKINI < 0.25 & FREQTOTAL >= 1.25
		replace PUBLICMILES = (0.475*DISTWORKMILES) if FREQCARINI >= 0.25 & FREQPUBLICINI >= 0.25 & FREQCYCLEINI >= 0.25 & FREQWALKINI < 0.25 & FREQTOTAL >= 1.25		
		replace CYCLEMILES = (0.05*DISTWORKMILES) if FREQCARINI >= 0.25 & FREQPUBLICINI >= 0.25 & FREQCYCLEINI >= 0.25 & FREQWALKINI < 0.25 & FREQTOTAL >= 1.25

		replace CARMILES = (0.495*DISTWORKMILES) if FREQCARINI >= 0.25 & FREQPUBLICINI >= 0.25 & FREQWALKINI >= 0.25 & FREQCYCLEINI <0.25 &  FREQTOTAL >= 1.25
		replace PUBLICMILES = (0.495*DISTWORKMILES) if FREQCARINI >= 0.25 & FREQPUBLICINI >= 0.25 & FREQWALKINI >= 0.25 & FREQCYCLEINI <0.25 &  FREQTOTAL >= 1.25		
		replace WALKMILES = (0.01*DISTWORKMILES) if FREQCARINI >= 0.25 & FREQPUBLICINI >= 0.25 & FREQWALKINI >= 0.25 & FREQCYCLEINI <0.25 & FREQTOTAL >= 1.25

		replace CARMILES = (0.9*DISTWORKMILES) if FREQCARINI >= 0.25 & FREQCYCLEINI >= 0.25 & FREQWALKINI >= 0.25 & FREQPUBLICINI < 0.25 & FREQTOTAL >= 1.25
		replace CYCLEMILES = (0.09*DISTWORKMILES) if FREQCARINI >= 0.25 & FREQCYCLEINI >= 0.25 & FREQWALKINI >= 0.25 & FREQPUBLICINI < 0.25 & FREQTOTAL >= 1.25
		replace WALKMILES = (0.01*DISTWORKMILES) if FREQCARINI >= 0.25 & FREQCYCLEINI >= 0.25 & FREQWALKINI >= 0.25 & FREQPUBLICINI < 0.25 & FREQTOTAL >= 1.25
	
		replace PUBLICMILES = (0.9*DISTWORKMILES) if FREQPUBLICINI >= 0.25 & FREQCYCLEINI >= 0.25 & FREQWALKINI >= 0.25 & FREQCARINI <0.25 & FREQTOTAL >= 1.25
		replace CYCLEMILES = (0.09*DISTWORKMILES) if FREQPUBLICINI >= 0.25 & FREQCYCLEINI >= 0.25 & FREQWALKINI >= 0.25 & FREQCARINI <0.25 & FREQTOTAL >= 1.25
		replace WALKMILES = (0.01*DISTWORKMILES) if FREQPUBLICINI >= 0.25 & FREQCYCLEINI >= 0.25 & FREQWALKINI >= 0.25 & FREQCARINI <0.25 & FREQTOTAL >= 1.25
		

		replace CARMILES = (0.45*DISTWORKMILES) if FREQCARINI >= 0.25 & FREQPUBLICINI >= 0.25 & FREQCYCLEINI >= 0.25 & FREQWALKINI >= 0.25 & FREQTOTAL >= 1.25
		replace PUBLICMILES = (0.45*DISTWORKMILES) if FREQCARINI >= 0.25 & FREQPUBLICINI >= 0.25 & FREQCYCLEINI >= 0.25 & FREQWALKINI >= 0.25 & FREQTOTAL >= 1.25
		replace CYCLEMILES = (0.09*DISTWORKMILES) if FREQCARINI >= 0.25 & FREQPUBLICINI >= 0.25 & FREQCYCLEINI >= 0.25 & FREQWALKINI >= 0.25 & FREQTOTAL >= 1.25
		replace WALKMILES = (0.01*DISTWORKMILES) if FREQCARINI >= 0.25 & FREQPUBLICINI >= 0.25 & FREQCYCLEINI >= 0.25 & FREQWALKINI >= 0.25 & FREQTOTAL >= 1.25
		


*Truncate Walking and Cycling
replace CYCLEMILES = 20 if CYCLEMILES > 20 & CYCLEMILES !=.
replace WALKMILES = 3 if WALKMILES > 3 & WALKMILES !=.


gen DURWALK = WALKMILES*2*(5/7)/3
gen DURCYCLE = CYCLEMILES*2*(5/7)/10
gen DURCAR = CARMILES*2*(5/7)/45
gen DURPUBLIC = PUBLICMILES*2*(5/7)/30

*assume 2 journeys over 5 days at 3mph (walk), cycle at 10mph, car at 45mph and public at 30mph - rescaled to daily basis in 7 day week


***************************
/* RECREATIONAL ACTIVITY */
/* JP- Takes frequencies reported in categories to frequencies per week */
***************************

foreach var in swimComp_CLEAN swimLeis_CLEAN backPackMountainClimb_CLEAN walkPleasure_CLEAN cyclingRacingRough_CLEAN ///
cyclePleasure_CLEAN mowing_CLEAN waterLawn_CLEAN heavyGardening_CLEAN weedPrune_CLEAN dIY_CLEAN aerobicsHigh_CLEAN ///
aerobicsOther_CLEAN exerciseWeights_CLEAN conditionExercise_CLEAN floorExercise_CLEAN dancing_CLEAN ///
compRun_CLEAN jog_CLEAN bowling_CLEAN tennisBadminton_CLEAN squash_CLEAN tableTennis_CLEAN golf_CLEAN ///
footballRugbyHockey_CLEAN cricket_CLEAN rowing_CLEAN netVolleyBasketBall_CLEAN huntingShootingFish_CLEAN horseBased_CLEAN ///
snookerBillardsDarts_CLEAN musicalInstrumentSing_CLEAN iceSkating_CLEAN sailingWindsurfBoat_CLEAN ///
combatsSports_CLEAN {

	replace `var' = 0     if `var' <= 1 & (QVersion_CLEAN == 8 | QVersion_CLEAN == 9) & template==1 & MISSINGC == 2
	replace `var' = 1/4   if `var' == 2 & (QVersion_CLEAN == 8 | QVersion_CLEAN == 9) & template==1   
	replace `var' = 2.5/4 if `var' == 3 & (QVersion_CLEAN == 8 | QVersion_CLEAN == 9) & template==1 
	replace `var' = 1     if `var' == 4 & (QVersion_CLEAN == 8 | QVersion_CLEAN == 9) & template==1
	replace `var' = 2.5   if `var' == 5 & (QVersion_CLEAN == 8 | QVersion_CLEAN == 9) & template==1 
	replace `var' = 4.5   if `var' == 6 & (QVersion_CLEAN == 8 | QVersion_CLEAN == 9) & template==1
	replace `var' = 7     if `var' == 7 & (QVersion_CLEAN == 8 | QVersion_CLEAN == 9) & template==1
	
	replace `var' = 0     if `var' <= 2 & QVersion_CLEAN == 9 & template==2 & MISSINGC == 2
	replace `var' = 1/4   if `var' == 3 & QVersion_CLEAN == 9 & template==2 
	replace `var' = 2.5/4 if `var' == 4 & QVersion_CLEAN == 9 & template==2 
	replace `var' = 1     if `var' == 5 & QVersion_CLEAN == 9 & template==2 
	replace `var' = 2.5   if `var' == 6 & QVersion_CLEAN == 9 & template==2 
	replace `var' = 4.5   if `var' == 7 & QVersion_CLEAN == 9 & template==2 
	replace `var' = 7     if `var' == 8 & QVersion_CLEAN == 9 & template==2 
	
}

***************************
/* JP- assigns maximum duration of activities to allow for overreporting of the duration of an episode */
***************************

*truncated high durations

foreach x in swimComp swimLeis compRun jog bowling tennisBadminton tableTennis horseBased snookerBillardsDarts musicalInstrumentSing iceSkating {

replace `x'Hr_CLEAN = 4 if `x'Hr_CLEAN >4 & `x'Hr_CLEAN !=.
replace `x'Min_CLEAN = 0 if `x'Hr_CLEAN >=4 & `x'Hr_CLEAN !=. 
}

foreach x in exerciseWeights squash {

replace `x'Hr_CLEAN = 2 if `x'Hr_CLEAN >2 & `x'Hr_CLEAN !=.
replace `x'Min_CLEAN = 0 if `x'Hr_CLEAN >=2 & `x'Hr_CLEAN !=. 
}

foreach x in mowing waterLawn aerobicsHigh aerobicsOther conditionExercise floorExercise footballRugbyHockey netVolleyBasketBall rowing combatsSports {

replace `x'Hr_CLEAN = 3 if `x'Hr_CLEAN >3 & `x'Hr_CLEAN !=.
replace `x'Min_CLEAN = 0 if `x'Hr_CLEAN >=3 & `x'Hr_CLEAN !=. 
}

foreach x in backPackMountainClimb walkPleasure cyclingRacingRough cyclePleasure heavyGardening weedPrune dancing cricket {

replace `x'Hr_CLEAN = 8 if `x'Hr_CLEAN >8 & `x'Hr_CLEAN !=.
replace `x'Min_CLEAN = 0 if `x'Hr_CLEAN >=8 & `x'Hr_CLEAN !=. 
}

foreach x in dIY golf huntingShootingFish sailingWindsurfBoat  {

replace `x'Hr_CLEAN = 10 if `x'Hr_CLEAN >10 & `x'Hr_CLEAN !=.
replace `x'Min_CLEAN = 0 if `x'Hr_CLEAN >=10 & `x'Hr_CLEAN !=. 
}


*where no frequency is reported (but duration is) assign median frequency from those participating in the activity 
*major assumption
foreach x in swimComp swimLeis backPackMountainClimb walkPleasure cyclingRacingRough cyclePleasure mowing waterLawn heavyGardening ///
weedPrune dIY aerobicsHigh aerobicsOther exerciseWeights conditionExercise floorExercise dancing compRun jog bowling tennisBadminton ///
squash tableTennis golf footballRugbyHockey cricket rowing netVolleyBasketBall huntingShootingFish horseBased snookerBillardsDarts ///
musicalInstrumentSing iceSkating sailingWindsurfBoat combatsSports {
	
	su `x'_CLEAN if `x'_CLEAN > 0 & `x'_CLEAN !=., detail
	replace `x'_CLEAN = r(p50) if (`x'_CLEAN <=0 | `x'_CLEAN == .) & ((`x'Hr_CLEAN > 0 & `x'Hr_CLEAN < 20) | (`x'Min_CLEAN > 0 & `x'Min_CLEAN <= 60))
	replace `x'_CLEAN = 0 if (`x'_CLEAN == . | `x'_CLEAN < 0 ) & MISSINGC == 2
}


*where no duration is reported (but frequency is) assign median duration from those participating in the activity 

foreach x in swimComp swimLeis backPackMountainClimb walkPleasure cyclingRacingRough cyclePleasure mowing waterLawn heavyGardening ///
weedPrune dIY aerobicsHigh aerobicsOther exerciseWeights conditionExercise floorExercise dancing compRun jog bowling tennisBadminton ///
squash tableTennis golf footballRugbyHockey cricket rowing netVolleyBasketBall huntingShootingFish horseBased snookerBillardsDarts ///
musicalInstrumentSing iceSkating sailingWindsurfBoat combatsSports {
	
	replace `x'Hr_CLEAN = 0 if `x'Hr_CLEAN < 0
	replace `x'Min_CLEAN = 0 if `x'Min_CLEAN < 0
	*generate total hrs of each activity
	gen TOT_`x'Hr = .
	replace TOT_`x'Hr = (`x'Hr_CLEAN + (`x'Min_CLEAN/60)) if ((`x'Hr_CLEAN > 0 & `x'Hr_CLEAN <20) | (`x'Min_CLEAN > 0 & `x'Min_CLEAN <=60))
	su TOT_`x'Hr if TOT_`x'Hr > 0 & TOT_`x'Hr < 20, detail
	replace TOT_`x'Hr = r(p50) if (TOT_`x'Hr  == .) & (`x'_CLEAN > 0 & `x'_CLEAN < 8)
	replace TOT_`x'Hr = 0 if (TOT_`x'Hr == . & MISSINGC == 2)
}


foreach x in swimComp swimLeis backPackMountainClimb walkPleasure cyclingRacingRough cyclePleasure mowing waterLawn heavyGardening ///
weedPrune dIY aerobicsHigh aerobicsOther exerciseWeights conditionExercise floorExercise dancing compRun jog bowling tennisBadminton ///
squash tableTennis golf footballRugbyHockey cricket rowing netVolleyBasketBall huntingShootingFish horseBased snookerBillardsDarts ///
musicalInstrumentSing iceSkating sailingWindsurfBoat combatsSports {

	*generate total hrs of each activity per day
	gen TOTDUR_`x' = (TOT_`x'Hr * `x'_CLEAN)/7

}

***************************
/* calculates total reported leisure time activities per day */
***************************

egen DURATIONLEIS = rowtotal (TOTDUR_swimComp TOTDUR_swimLeis TOTDUR_backPackMountainClimb TOTDUR_walkPleasure ///
TOTDUR_cyclingRacingRough TOTDUR_cyclePleasure TOTDUR_mowing TOTDUR_waterLawn TOTDUR_heavyGardening ///
TOTDUR_weedPrune TOTDUR_dIY TOTDUR_aerobicsHigh TOTDUR_aerobicsOther TOTDUR_exerciseWeights ///
TOTDUR_conditionExercise TOTDUR_floorExercise TOTDUR_dancing TOTDUR_compRun TOTDUR_jog TOTDUR_bowling ///
TOTDUR_tennisBadminton TOTDUR_squash TOTDUR_tableTennis TOTDUR_golf TOTDUR_footballRugbyHockey TOTDUR_cricket ///
TOTDUR_rowing TOTDUR_netVolleyBasketBall TOTDUR_huntingShootingFish TOTDUR_horseBased TOTDUR_snookerBillardsDarts ///
TOTDUR_musicalInstrumentSing TOTDUR_iceSkating TOTDUR_sailingWindsurfBoat TOTDUR_combatsSports) if MISSINGC == 2


***************************
/* re-weights total reported activities per day if total is greater than 18hrs per day */
***************************
gen TVadj = DURTV
gen COMPadj = DURCOMP
gen JOBadj = DURJOB
gen CARadj = DURCAR
gen PUBLICadj = DURPUBLIC
gen CYCLEadj = DURCYCLE 
gen WALKadj = DURWALK
gen LEISadj = DURATIONLEIS
gen STAIRadj = DURSTAIRFLIGHT
egen DURATIONINI = rowtotal(TVadj COMPadj JOBadj CARadj STAIRadj PUBLICadj CYCLEadj WALKadj LEISadj) if (MISSINGA == 2 | MISSINGCOMMUT == 2 | MISSINGJOB == 2 | MISSINGC == 2)


	replace TVadj = DURTV*18/DURATIONINI if DURATIONINI > 18
	replace COMPadj = DURCOMP*18/DURATIONINI if DURATIONINI > 18
	replace STAIRadj = DURSTAIRFLIGHT*18/DURATIONINI if DURATIONINI > 18
	replace JOBadj= DURJOB*18/DURATIONINI if DURATIONINI > 18
	replace CARadj = DURCAR*18/DURATIONINI if DURATIONINI > 18
	replace PUBLICadj = DURPUBLIC*18/DURATIONINI if DURATIONINI > 18
	replace CYCLEadj = DURCYCLE*18/DURATIONINI if DURATIONINI > 18
	replace WALKadj = DURWALK*18/DURATIONINI if DURATIONINI > 18


foreach x in TOTDUR_swimComp TOTDUR_swimLeis TOTDUR_backPackMountainClimb TOTDUR_walkPleasure ///
TOTDUR_cyclingRacingRough TOTDUR_cyclePleasure TOTDUR_mowing TOTDUR_waterLawn TOTDUR_heavyGardening ///
TOTDUR_weedPrune TOTDUR_dIY TOTDUR_aerobicsHigh TOTDUR_aerobicsOther TOTDUR_exerciseWeights ///
TOTDUR_conditionExercise TOTDUR_floorExercise TOTDUR_dancing TOTDUR_compRun TOTDUR_jog TOTDUR_bowling ///
TOTDUR_tennisBadminton TOTDUR_squash TOTDUR_tableTennis TOTDUR_golf TOTDUR_footballRugbyHockey TOTDUR_cricket ///
TOTDUR_rowing TOTDUR_netVolleyBasketBall TOTDUR_huntingShootingFish TOTDUR_horseBased TOTDUR_snookerBillardsDarts ///
TOTDUR_musicalInstrumentSing TOTDUR_iceSkating TOTDUR_sailingWindsurfBoat TOTDUR_combatsSports {
	gen `x'a = `x' /*variables postfixed with "a" stands for adjusted variable*/
	replace `x'a = `x'*18/DURATIONINI if DURATIONINI > 18
	}


***************************
/* Calculating sleep time based on max reported hours of activities */
/* AH= Remaining time - Time not accounted for by RPAQ */
***************************

gen UNACCOUNTED = .
gen SLEEP = .
replace SLEEP = 6 if DURATIONINI > 18
replace SLEEP = 24-DURATIONINI if DURATIONINI <= 18 & DURATIONINI > 16
replace SLEEP = 8 if DURATIONINI <= 16

replace UNACCOUNTED = 24 -(DURATIONINI+8) if DURATIONINI <= 16
replace UNACCOUNTED = 24-DURATIONINI - SLEEP if DURATIONINI <= 18 & DURATIONINI > 16
replace UNACCOUNTED = 0 if DURATIONINI > 18



*generate adjusted durations for each domain considering maximum value of 18 for total activities

*HOME
egen HOMEtime = rowtotal(TVadj COMPadj STAIRadj) if MISSINGA == 2

*WORK
gen WORKtime = JOBadj if MISSINGJOB == 2

*COMMUTING
egen COMMUTEtime = rowtotal(CARadj PUBLICadj CYCLEadj WALKadj) if MISSINGCOMMUT == 2

*LEISURE
egen LEIStime = rowtotal(TOTDUR_swimCompa TOTDUR_swimLeisa TOTDUR_backPackMountainClimba TOTDUR_walkPleasurea ///
TOTDUR_cyclingRacingRougha TOTDUR_cyclePleasurea TOTDUR_mowinga TOTDUR_waterLawna TOTDUR_heavyGardeninga ///
TOTDUR_weedPrunea TOTDUR_dIYa TOTDUR_aerobicsHigha TOTDUR_aerobicsOthera TOTDUR_exerciseWeightsa ///
TOTDUR_conditionExercisea TOTDUR_floorExercisea TOTDUR_dancinga TOTDUR_compRuna TOTDUR_joga TOTDUR_bowlinga ///
TOTDUR_tennisBadmintona TOTDUR_squasha TOTDUR_tableTennisa TOTDUR_golfa TOTDUR_footballRugbyHockeya TOTDUR_cricketa ///
TOTDUR_rowinga TOTDUR_netVolleyBasketBalla TOTDUR_huntingShootingFisha TOTDUR_horseBaseda TOTDUR_snookerBillardsDartsa ///
TOTDUR_musicalInstrumentSinga TOTDUR_iceSkatinga TOTDUR_sailingWindsurfBoata TOTDUR_combatsSportsa) if MISSINGC == 2

egen TOTALtime = rowtotal(HOMEtime WORKtime COMMUTEtime LEIStime SLEEP)
*TOTALtime always = 24 (minus AH)


*****************************************************************
/* Calculation of MET scores as per Ainsworth's PA Compendium */
*****************************************************************
/* Home section */
gen SCORETV = TVadj
gen SCORECOMP = COMPadj*1.5
*stairs is halfway between 8 for going up and 3 for going down
gen SCORESTAIRS = STAIRadj*5.5
egen SCOREHOME = rowtotal(SCORETV SCORECOMP SCORESTAIRS)

replace SCOREHOME = . if MISSINGA == 1


/* Work section */

*worktype assigned as 1 (median in Fenland dataset) where worktype is missing but time in work reported

gen SCOREJOB = . if MISSINGJOB == 1
replace SCOREJOB = 0 if JOBadj == 0
replace SCOREJOB = 1.5*JOBadj if JOBadj > 0 & (Worktype_CLEAN == . | Worktype_CLEAN <0)
replace SCOREJOB = 1.5*JOBadj if Worktype_CLEAN == 1
replace SCOREJOB = 2.3*JOBadj if Worktype_CLEAN == 2
replace SCOREJOB = 3.5*JOBadj if Worktype_CLEAN == 3
replace SCOREJOB = 5.5*JOBadj if Worktype_CLEAN == 4

/* Commuting section */
gen SCORECAR = 1.5*CARadj	/*Compendium says driving 2METs, riding 1MET*/
gen SCOREPUBLIC = PUBLICadj
gen SCORECYCLE = 6*CYCLEadj
gen SCOREWALK = 3.3*WALKadj
egen SCORECOMMUTE = rowtotal(SCORECAR SCOREPUBLIC SCORECYCLE SCOREWALK)
replace SCORECOMMUTE = . if MISSINGCOMMUT == 1

*Generate MET scores for LTPA variables
/* Recreation section */
gen SCORELAPSWI = TOTDUR_swimCompa *10
gen SCORELESSWI = TOTDUR_swimLeisa *6
gen SCOREBAKPAK = TOTDUR_backPackMountainClimba *7 
gen SCOREWALKPLEASURE = TOTDUR_walkPleasurea *3.5
gen SCORECYCRAC = TOTDUR_cyclingRacingRougha *10
gen SCORECYCPLE = TOTDUR_cyclePleasurea *4
gen SCORELAWN = TOTDUR_mowinga *5.5
gen SCOREWATER = TOTDUR_waterLawna *1.5
gen SCOREDIG = TOTDUR_heavyGardeninga *6
gen SCOREWEED = TOTDUR_weedPrunea *4.5
gen SCOREDIY = TOTDUR_dIYa *4.5
gen SCORESTEP = TOTDUR_aerobicsHigha *7
gen SCOREAERO = TOTDUR_aerobicsOthera *5
gen SCOREWGHT = TOTDUR_exerciseWeightsa *3
gen SCOREEXER = TOTDUR_conditionExercisea *5.5
gen SCOREFLOOR = TOTDUR_floorExercisea *4
gen SCOREDANCE = TOTDUR_dancinga *4.5
gen SCORECOMRUN = TOTDUR_compRuna *12.5
gen SCOREJOG = TOTDUR_joga *7
gen SCOREBOWL = TOTDUR_bowlinga *3
gen SCORETENBAD = TOTDUR_tennisBadmintona *6
gen SCORESQUASH = TOTDUR_squasha *12
gen SCORETABTEN = TOTDUR_tableTennisa *4
gen SCOREGOLF = TOTDUR_golfa *4.5
gen SCOREFOOT = TOTDUR_footballRugbyHockeya *8
gen SCORECRICK = TOTDUR_cricketa *5
gen SCOREROW = TOTDUR_rowinga *7
gen SCORENET = TOTDUR_netVolleyBasketBalla *5.5
gen SCOREFISH = TOTDUR_huntingShootingFisha *3
gen SCOREHORSE = TOTDUR_horseBaseda *4
gen SCORESNOOK = TOTDUR_snookerBillardsDartsa *2.5
gen SCOREMUSIC = TOTDUR_musicalInstrumentSinga *2.7
gen SCORESKATE = TOTDUR_iceSkatinga *7
gen SCORESAIL = TOTDUR_sailingWindsurfBoata *3
gen SCOREBOX = TOTDUR_combatsSportsa*10


egen SCORELEIS = rowtotal(SCORELAPSWI SCORELESSWI SCOREBAKPAK SCOREWALKPLEASURE SCORECYCRAC SCORECYCPLE SCORELAWN SCOREWATER SCOREDIG ///
SCOREWEED SCOREDIY SCORESTEP SCOREAERO SCOREWGHT SCOREEXER SCOREFLOOR SCOREDANCE SCORECOMRUN SCOREJOG ///
SCOREBOWL SCORETENBAD SCORESQUASH SCORETABTEN SCOREGOLF SCOREFOOT SCORECRICK SCOREROW SCORENET SCOREFISH ///
SCOREHORSE SCORESNOOK SCOREMUSIC SCORESKATE SCORESAIL SCOREBOX)
replace SCORELEIS = . if MISSINGC == 1

*Score Unaccounted time according to getting about mode (assumption being that this is a marker for energy cost of activities not captured)
gen SCORE_UNACCOUNTED = UNACCOUNTED * 1
replace SCORE_UNACCOUNTED = UNACCOUNTED * 1.3 if Gettingabout_CLEAN ==2 | Gettingabout_CLEAN ==4


/* Total score */

egen TOTMETHRS = rowtotal(SCOREHOME SCOREJOB SCORECOMMUTE SCORELEIS) if MISSING == 2 /*awake-time only*/
egen TOTMETHRS_w_UNACCtime = rowtotal(SCOREHOME SCOREJOB SCORECOMMUTE SCORELEIS SCORE_UNACCOUNTED) if MISSING == 2 /*awake-time only*/

egen TOTtime = rowtotal(HOMEtime WORKtime COMMUTEtime LEIStime)

gen ACTMETS = TOTMETHRS - TOTtime*1
gen ACTMETS_w_UNACCtime = TOTMETHRS_w_UNACCtime - (24-SLEEP)*1 
*difference between the two scores is the assignment of energy to unaccounted for time; the first score assigns 1MET to all unaccounted for time and the second assigns 1.3MET to unaccounted for time if the person reports getting about actively

gen HOME_METS = SCOREHOME
gen WORK_METS = SCOREJOB
gen LEIS_METS = SCORELEIS
gen COMMUTE_METS = SCORECOMMUTE

gen HOME_ACTMETS = SCOREHOME - HOMEtime
gen WORK_ACTMETS = SCOREJOB - WORKtime
gen LEIS_ACTMETS = SCORELEIS - LEIStime
gen COMMUTE_ACTMETS = SCORECOMMUTE - COMMUTEtime

*Convert MET HRS per day to kJ/kg/day. Multiply by 60 to get to MET minutes (which cancels out minutes), multiply by 3.5 * 20.35 to convert to J.
gen PAEE = ACTMETS * 3.5 * 20.35 * 60 / 1000
gen HOME_PAEE = HOME_ACTMETS * 3.5 * 20.35 * 60 / 1000
gen WORK_PAEE = WORK_ACTMETS * 3.5 * 20.35 * 60 / 1000
gen LEIS_PAEE = LEIS_ACTMETS * 3.5 * 20.35 * 60 / 1000
gen COMMUTE_PAEE = COMMUTE_ACTMETS * 3.5 * 20.35 * 60 / 1000

*ENERGY SPENT AT DIFFERENT INTENSITIES
	*SPA: <=1.5 METs, not including sleep
	egen SED_INTENSITY  = rowtotal(SCORETV SCORECOMP SCORECAR SCOREPUBLIC)
	egen SED_INTENSITY2 = rowtotal(SCORETV SCORECOMP SCORECAR SCOREPUBLIC SCOREJOB) 
	replace SED_INTENSITY = SED_INTENSITY2 if Worktype_CLEAN == 1
	drop SED_INTENSITY2

	*LPA: 1.5001-2.99 METs
	egen LIGHT_INTENSITY  = rowtotal(SCOREWATER SCORESNOOK SCOREMUSIC)
	egen LIGHT_INTENSITY2 = rowtotal(SCOREWATER SCORESNOOK SCOREMUSIC SCOREJOB) 
	replace LIGHT_INTENSITY = LIGHT_INTENSITY2 if Worktype_CLEAN == 2
	drop LIGHT_INTENSITY2

	*MPA: 3-5.99 METs
	*includes digging and commute cycling
	egen MODERATE_INTENSITY  = rowtotal(SCORESTAIRS SCOREWALK SCORECYCLE SCORELESSWI SCOREWALKPLEASURE SCORECYCPLE ///
										SCORELAWN SCOREDIG SCOREWEED SCOREDIY SCOREAERO SCOREWGHT SCOREEXER SCOREFLOOR SCOREDANCE SCOREBOWL ///
										SCORETENBAD SCORETABTEN SCOREGOLF SCORECRICK SCORENET SCOREFISH SCOREHORSE SCORESAIL)
	egen MODERATE_INTENSITY2 = rowtotal(SCORESTAIRS SCOREWALK SCORECYCLE SCORELESSWI SCOREWALKPLEASURE SCORECYCPLE ///
										SCORELAWN SCOREDIG SCOREWEED SCOREDIY SCOREAERO SCOREWGHT SCOREEXER SCOREFLOOR SCOREDANCE SCOREBOWL ///
										SCORETENBAD SCORETABTEN SCOREGOLF SCORECRICK SCORENET SCOREFISH SCOREHORSE SCORESAIL SCOREJOB)
	replace MODERATE_INTENSITY = MODERATE_INTENSITY2 if Worktype_CLEAN == 3
	drop MODERATE_INTENSITY2

	*VPA: >=6 METs
	egen VIGOROUS_INTENSITY  = rowtotal(SCORELAPSWI SCOREBAKPAK SCORECYCRAC SCORESTEP SCORECOMRUN SCOREJOG SCORESQUASH SCOREFOOT SCOREROW SCORESKATE SCOREBOX)
	egen VIGOROUS_INTENSITY2 = rowtotal(SCORELAPSWI SCOREBAKPAK SCORECYCRAC SCORESTEP SCORECOMRUN SCOREJOG SCORESQUASH SCOREFOOT SCOREROW SCORESKATE SCOREBOX SCOREJOB)
	replace VIGOROUS_INTENSITY = VIGOROUS_INTENSITY2 if Worktype_CLEAN == 4
	drop VIGOROUS_INTENSITY2

* TIME SPENT AT DIFFERENT INTENSITIES
	
	*Time spent in SPA (<=1.5 METs, not including sleep)
	egen SEDtime = rowtotal(TVadj COMPadj CARadj PUBLICadj)
	egen SEDtime2 = rowtotal(TVadj COMPadj CARadj PUBLICadj JOBadj) 
	replace SEDtime = SEDtime2 if Worktype_CLEAN == 1
	drop SEDtime2

	
	*Time spent in LPA: 1.5001-2.99 METs
	egen LIGHTtime  = rowtotal(TOTDUR_waterLawna TOTDUR_snookerBillardsDartsa TOTDUR_musicalInstrumentSinga)
	egen LIGHTtime2 = rowtotal(TOTDUR_waterLawna TOTDUR_snookerBillardsDartsa TOTDUR_musicalInstrumentSinga JOBadj)
	replace LIGHTtime = LIGHTtime2 if Worktype_CLEAN == 2
	drop LIGHTtime2

	*Time spent in MPA: 3-5.99 METs
	egen MODERATEtime = rowtotal(STAIRadj WALKadj CYCLEadj TOTDUR_swimLeisa TOTDUR_walkPleasurea TOTDUR_cyclePleasurea TOTDUR_mowinga ///
	TOTDUR_heavyGardeninga TOTDUR_weedPrunea TOTDUR_dIYa TOTDUR_aerobicsOthera TOTDUR_exerciseWeightsa ///
	TOTDUR_conditionExercisea TOTDUR_floorExercisea TOTDUR_dancinga TOTDUR_bowlinga TOTDUR_tennisBadmintona TOTDUR_tableTennisa TOTDUR_golfa ///
	TOTDUR_cricketa TOTDUR_netVolleyBasketBalla TOTDUR_huntingShootingFisha TOTDUR_horseBaseda TOTDUR_sailingWindsurfBoata)
	egen MODERATEtime2 = rowtotal(STAIRadj WALKadj CYCLEadj TOTDUR_swimLeisa TOTDUR_walkPleasurea TOTDUR_cyclePleasurea TOTDUR_mowinga ///
	TOTDUR_heavyGardeninga TOTDUR_weedPrunea TOTDUR_dIYa TOTDUR_aerobicsOthera TOTDUR_exerciseWeightsa ///
	TOTDUR_conditionExercisea TOTDUR_floorExercisea TOTDUR_dancinga TOTDUR_bowlinga TOTDUR_tennisBadmintona TOTDUR_tableTennisa TOTDUR_golfa ///
	TOTDUR_cricketa TOTDUR_netVolleyBasketBalla TOTDUR_huntingShootingFisha TOTDUR_horseBaseda TOTDUR_sailingWindsurfBoata JOBadj)
	replace MODERATEtime = MODERATEtime2 if Worktype_CLEAN == 3
	drop MODERATEtime2

	*Time spent in VPA: >=6 METs
	egen VIGOROUStime = rowtotal(TOTDUR_swimCompa TOTDUR_backPackMountainClimba TOTDUR_cyclingRacingRougha ///
	TOTDUR_aerobicsHigha TOTDUR_compRuna TOTDUR_joga TOTDUR_squasha TOTDUR_footballRugbyHockeya ///
	TOTDUR_rowinga TOTDUR_iceSkatinga TOTDUR_combatsSportsa)
	egen VIGOROUStime2 = rowtotal(TOTDUR_swimCompa TOTDUR_backPackMountainClimba TOTDUR_cyclingRacingRougha ///
	TOTDUR_aerobicsHigha TOTDUR_compRuna TOTDUR_joga TOTDUR_squasha TOTDUR_footballRugbyHockeya ///
	TOTDUR_rowinga TOTDUR_iceSkatinga TOTDUR_combatsSportsa JOBadj)
	replace VIGOROUStime = VIGOROUStime2 if Worktype_CLEAN == 4
	drop VIGOROUStime2

*LABELLING OF KEY VARIABLES
label var TOTMETHRS "Total reported duration (hours) of activity times intensity (MET) [METhrs/d]"
label var TOTMETHRS_w_UNACCtime  "Total reported plus unaccounted duration hours) times intensity (MET) [METhrs/d]"
label var TOTtime "Total reported duration (hours) of activity [hrs/d]"
label var TOTALtime "Total reported duration (hours) of activity + assumed sleep [hrs/d]"
label var ACTMETS "Total activity energy expenditure discounting resting [net METhrs/d]"
label var ACTMETS_w_UNACCtime "Activity EE incl AEE for unaccounted time for active getting about [net METhrs/d]"
label var PAEE "Physical activity energy expenditure [kJ/kg/d]"

label var HOME_METS "Home domain energy expenditure [METhrs/d]"
label var WORK_METS "Work domain energy expenditure [METhrs/d]"
label var LEIS_METS "Leisure domain energy expenditure [METhrs/d]"
label var COMMUTE_METS "Commute domain energy expenditure [METhrs/d]"
label var HOME_ACTMETS "Home domain activity energy expenditure [net METhrs/d]"
label var WORK_ACTMETS "Work domain activity energy expenditure [net METhrs/d]"
label var LEIS_ACTMETS "Leisure domain activity energy expenditure [net METhrs/d]"
label var COMMUTE_ACTMETS "Commute domain activity energy expenditure [net METhrs/d]"
label var HOME_PAEE "Home domain activity energy expenditure [kJ/kg/d]"
label var WORK_PAEE "Work domain activity energy expenditure [kJ/kg/d]"
label var LEIS_PAEE "Leisure domain activity energy expenditure [kJ/kg/d]"
label var COMMUTE_PAEE "Commute domain activity energy expenditure [kJ/kg/d]"

label var SED_INTENSITY "Sedentary behavior energy expenditure [METhrs/d]"
label var LIGHT_INTENSITY "Light intensity energy expenditure [METhrs/d]"
label var MODERATE_INTENSITY "Moderate intensity energy expenditure [METhrs/d]"
label var VIGOROUS_INTENSITY "Vigorous intensity energy expenditure [METhrs/d]"
label var SEDtime "Time spent sedentary, excluding sleep [hrs/d]"
label var LIGHTtime "Time spent at light intensity activity [hrs/d]"
label var MODERATEtime "Time spent at moderate intensity activity [hrs/d]"
label var VIGOROUStime "Time spent at vigorous intensity activity [hrs/d]"

***************************
*END
***************************

*keep serno TOTMETHRS TOTMETHRS_w_UNACCtime TOTtime TOTALtime ACTMETS SED_INTENSITY LIGHT_INTENSITY MODERATE_INTENSITY VIGOROUS_INTENSITY SEDtime LIGHTtime MODERATEtime VIGOROUStime HOME_METS WORK_METS LEIS_METS COMMUTE_METS
order StudyID template 

cd "`FOLDER'"

save `OUTPUT_FILE'.dta, replace
*This is now the most up to date version of RPAQ data with METs generated that is to be used for analyses. 

set more off
di "Listing extreme PAEE values"
list StudyID PAEE WORK_PAEE Worktype_CLEAN WORKtime LEIStime LIGHTtime MODERATEtime VIGOROUStime if WORK_PAEE> 120 & WORK_PAEE!=.
list StudyID PAEE LEIS_PAEE LEIStime LIGHTtime MODERATEtime VIGOROUStime if LEIS_PAEE> 120 & LEIS_PAEE!=.


/*
*******************************************************
/* Calculation of PAEE by the Henry formula (kJ/day) */
*******************************************************
/* Men */
gen rmrrpaq = 1000*((.118*weight)+((3.59*height)/100)-1.55) if sex == 1 & age > 0 & age <= 3
replace rmrrpaq = 1000*((.0632*weight)+((1.31*height)/100)+1.28) if sex == 1 & age > 3 & age <= 10
replace rmrrpaq = 1000*((.0651*weight)+((1.11*height)/100)+1.25) if sex == 1 & age > 10 & age <= 18
replace rmrrpaq = 1000*((.06*weight)+((1.31*height)/100)+.473) if sex == 1 & age > 18 & age <= 30
replace rmrrpaq = 1000*((.0476*weight)+((2.26*height)/100)-.574) if sex == 1 & age > 30 & age <= 60
replace rmrrpaq = 1000*((.0478*weight)+((2.26*height)/100)-1.07) if sex == 1 & age > 60 & age <= 150

/* Women */
replace rmrrpaq = 1000*((.127*weight)+((2.94*height)/100)-1.2) if sex == 2 & age > 0 & age <= 3
replace rmrrpaq = 1000*((.0666*weight)+((.878*height)/100)+1.46) if sex == 2 & age > 3 & age <= 10
replace rmrrpaq = 1000*((.0393*weight)+((1.04*height)/100)+1.93) if sex == 2 & age > 10 & age <= 18
replace rmrrpaq = 1000*((.0433*weight)+((2.57*height)/100)-1.18) if sex == 2 & age > 18 & age <= 30
replace rmrrpaq = 1000*((.0342*weight)+((2.1*height)/100)-.0486) if sex == 2 & age > 30 & age <= 60
replace rmrrpaq = 1000*((.0356*weight)+((1.76*height)/100)-.0448) if sex == 2 & age > 60 & age <= 150

gen PAEEHOME = (SCOREHOME*weight*4.263)-(rmrrpaq*HOME/24)
gen PAEEJOB = (SCOREJOB*weight*4.263)-(rmrrpaq*JOB/24)
gen PAEETRANSPORT = (SCORETRANSPORT*weight*4.263)-(rmrrpaq*TRANSPORT/24)
gen PAEERECREA = (SCORERECREA*weight*4.263)-(rmrrpaq*RECREA/24)
gen PAEE = (SCORE1*weight*4.263)-(rmrrpaq*DURATION/24)

*/
